
****************************
* Media Influence and Spatial Voting: The Role of Perceived Party Positions
* Political Behavior
* Lucas Paulo da Silva
* (Based on code from Florian Foos and Daniel Bischof)
* Do-File for preparing dataset
* 28/02/2025
****************************

/*
--------------------------------------------------------------------------------
This code merges and recodes the BSA data. 

The BSA changes variables, their names and labels across time. 
Thus, we need to handclean all of this. 

Much of this code is similar or identical to the do-files created by Foos and Bischof (2022) in their study "Tabloid Media Campaigns and Public Opinion: Quasi-Experimental Evidence on Euroscepticism in England". I combined several of their do-files and made changes to suit my study. This included adding new variables, recoding variables in different ways, and removing elements that are not needed for my study. Significant changes for my study are indicated by "MISV" (Media Influence and Spatial Voting) in the section titles below. The do-files from Foos and Bischof (2022) can be found here: https://doi.org/10.7910/DVN/NYPOQD
--------------------------------------------------------------------------------
*/

*Version 16.1 // in case Stata's syntax changes in future versions





********************************************************************************
* prepare_BSA
********************************************************************************
	
// ---------- 83-91 cumulative file: ----------
	use "C:\your\path\to\bsa83-91_2955.dta", clear  // replace with your file path

	// keep only relevant variables:
	keep rocccode rsoc dateint dateintd pano eecimm whpaper eec year region stregion srinc rage rsex partyid1 partyid2 rsocclas raceorig hedqual rghclass readpap religion rghgrp confeel labfeel abort1 ecohelpc incomgap confeel labfeel redistrb tuinf2 tupower homosex sexhomo prejblk asianimm conxtrme labxtrme union unionsa tenure2 tea ethnicgp

	*clean date: 
	*Issue is that for a lot of respondents we do not observe the day of the interview, we handcode this in the next preparation file. Luckily not many are missing for 1989 -- the treatment year. 
	gen h1 = string(dateint, "%06.0f") 
	gen h2 = real(substr(h1, -4, 2)) if dateint<100000
	replace h2=. if h2==99
	gen h3 = real(substr(h1, -4, 2)) if dateint>100000
	replace h3=. if h3==99
	
	// clean some of the strangely coded interview dates which are certainly not in the correct order:
	gen m=.
	replace m=h2 if dateint<100000 
	replace m=h3 if dateint>100000 
	replace m=3 if dateint==990389 
	replace m=4 if dateint==990489
	replace m=4 if dateint==990491
	replace m=5 if dateint==990589
	replace m=6 if dateint==990691
	replace m=7 if dateint==990789
	replace m=8 if dateint==990889
	
	gen h4=mod(dateint, 100)
	gen y=h4+1900
	replace y=1989 if dateint==999989
	replace y=1991 if dateint==999991
	replace y=1989 if dateint==990389 
	replace y=1989 if dateint==990489 
	replace y=1989 if dateint==990589 
	replace y=1989 if dateint==990789 
	replace y=1989 if dateint==990889 
	replace y=1991 if dateint==990491
	replace y=1991 if dateint==990691
	
	gen d=dateintd
	replace d=. if dateintd==99 
	
	drop h1 h2 h3 h4
	
	gen idate = mdy(m, d, y)
	format idate %d
	gen imonth = ym(y, m)
	format imonth %tm

	rename readpap h1 
	gen readpap=0
	replace readpap=1 if h1==1 
	replace readpap=. if h1==9 
	lab var readpap "r reads newpaper 3+times per week"
	drop h1 

	rename hedqual edu
	
	rename rsocclas sclass 

	rename rsex gender
	
	decode pano, gen(constituency)
	replace constituency = subinstr(constituency," ","",.)
	replace constituency = subinstr(constituency,"&","and",.)
	replace constituency = subinstr(constituency,"l'","liver",.)
	replace constituency = subinstr(constituency,".","",.)
	
	rename eecimm EUimmigration

	rename whpaper news
	
	gen voteleave=0
	replace voteleave=1 if eec==2 
	replace voteleave=. if eec==. 
	replace voteleave=. if eec==9
	
	gen voteleave2=0
	replace voteleave2=1 if eec==2 
	replace voteleave2=. if eec==. 
	replace voteleave2=. if eec==9
	
	gen leave_dk=0 if eec<=7 
	replace leave_dk=1 if eec==8
	replace leave_dk=. if eec==.

	gen occupation=rsoc

	gen occupation_b1990 = rocccode
	
	gen const=.

	
***** MISV variables *****

*** party support

* disliking conservatives
	gen conhate = .
	replace conhate=. if confeel==0
	replace conhate=0 if confeel==1
	replace conhate=0.16667 if confeel==2
	replace conhate=0.33334 if confeel==3
	replace conhate=0.5 if confeel==4
	replace conhate=0.66668 if confeel==5
	replace conhate=0.83335 if confeel==6
	replace conhate=1 if confeel==7
	replace conhate=. if confeel>=8
	
* disliking labour
	gen labhate = .
	replace labhate=. if labfeel==0
	replace labhate=0 if labfeel==1
	replace labhate=0.16667  if labfeel==2
	replace labhate=0.33334 if labfeel==3
	replace labhate=0.5 if labfeel==4
	replace labhate=0.66668 if labfeel==5
	replace labhate=0.83335 if labfeel==6
	replace labhate=1 if labfeel==7
	replace labhate=. if labfeel>=8

* party identification
	gen labour=0
	replace labour=1 if partyid2==2

	gen conservative=0
	replace conservative=1 if partyid2==1
	
	rename partyid2 party	

	
*** voter position
	
* redistribution
	gen redist_lr = .
	replace redist_lr=0.00 if redistrb==1
	replace redist_lr=0.25 if redistrb==2
	replace redist_lr=0.50 if redistrb==3
	replace redist_lr=0.75 if redistrb==4
	replace redist_lr=1.00 if redistrb==5

* trade unions
	gen unions_lr = .
	replace unions_lr=1 if tuinf2==1
	replace unions_lr=0 if tuinf2==2
	replace unions_lr=0 if tuinf2==3
	replace unions_lr=1 if tupower==1
	replace unions_lr=1 if tupower==2
	replace unions_lr=0 if tupower==3
	replace unions_lr=0 if tupower==4
	replace unions_lr=0 if tupower==5
	
* trade unions (scale)
	gen unions_lr_sc = .
	replace unions_lr_sc=1 if tuinf2==1
	replace unions_lr_sc=.5 if tuinf2==2
	replace unions_lr_sc=0 if tuinf2==3
	replace unions_lr_sc=1 if tupower==1
	replace unions_lr_sc=.75 if tupower==2
	replace unions_lr_sc=.5 if tupower==3
	replace unions_lr_sc=.25 if tupower==4
	replace unions_lr_sc=0 if tupower==5
	
* homosexuality
	gen gay_lr = .
	replace gay_lr=1 if homosex==1
	replace gay_lr=1 if homosex==2
	replace gay_lr=1 if homosex==3
	replace gay_lr=0 if homosex==4
	replace gay_lr=0 if homosex==5
	replace gay_lr=1 if homosex==6
	replace gay_lr=1 if sexhomo==1
	replace gay_lr=1 if sexhomo==2
	replace gay_lr=0 if sexhomo==3
	replace gay_lr=0 if sexhomo==4
	
* opinion on asian immigration
	gen asianimmig_lr = .
	replace asianimmig_lr=0 if asianimm==1
	replace asianimmig_lr=1 if asianimm==2
	replace asianimmig_lr=0 if asianimm==3


*** party position
	
* recode variable for thinking cons are extreme
	gen conextr_p = .
	replace conextr_p=1 if conxtrme==1
	replace conextr_p=0 if conxtrme==2

* recode variable for thinking labs are extreme
	gen labextr_p = .
	replace labextr_p=1 if labxtrme==1
	replace labextr_p=0 if labxtrme==2

	
*** control variables

* religion
	rename religion h1
	gen religion=.
	*none
	replace religion=0 if h1==1  
	*christian
	replace religion=1 if h1>=2 & h1<=8  
	replace religion=1 if h1>=21 & h1<=27
	*other
	replace religion=2 if h1>=9 & h1<=14
	
* religion (more categories)
	gen religion_cat=.
	*none
	replace religion_cat=0 if h1==1 
	*christian, non-catholic
	replace religion_cat=1 if h1==2
	replace religion_cat=1 if h1>=4 & h1<=8  
	replace religion_cat=1 if h1>=21 & h1<=27
	*catholic
	replace religion_cat=2 if h1==3 
	*other
	replace religion_cat=3 if h1>=9 & h1<=14
	drop h1 
	
* age
	rename rage age
	replace age=. if age >= 98
	
* union
	rename union h1
	gen union = .
	replace union=1 if h1==1
	replace union=0 if h1==2
	replace union=1 if unionsa==1
	replace union=1 if unionsa==2
	replace union=0 if unionsa==3
	drop h1
	
* ethnicity
	gen white=0
	replace white=1 if raceorig==7 | raceorig==8 | raceorig==9 & year>=1989
	replace white=1 if ethnicgp==1 & year<=1988
	
* home
	gen ownhome=0
	replace ownhome=1 if tenure2==1

* education
	gen education=tea
	replace education=6 if tea==6 | tea==7
	replace education=. if tea>=8

	
	tempfile 83_91_clean
	save `83_91_clean'

	
	
	
// ---------- 92 file: ----------
	use "C:\your\path\to\bsa92_bes92e.dta", clear
	
	gen year=1992

	drop if arfi == .

	decode arfi, gen(constituency)
	replace constituency = subinstr(constituency," ","",.)
	replace constituency = subinstr(constituency,"&","and",.)
	replace constituency = subinstr(constituency,".","",.)
	
	gen h1 = string(v925f, "%06.0f") 
	gen h2 = real(substr(h1, -3, 1)) 
	replace h2=. if h1=="999992"
	
	gen m=h2
	gen imonth = ym(year, m)
	format imonth %tm
	drop h1 h2

	rename  v2a h1 
	gen readpap=0
	replace readpap=1 if h1==1 
	replace readpap=. if h1==9 
	lab var readpap "r reads newpaper 3+times per week"
	drop h1 

	rename hedqual edu

	rename rsocclas sclass 

	rename v915b gender 

	rename v2b_1 news

	rename v29a ecpolicy 

	gen voteleave=0
	replace voteleave=1 if ecpolicy==1 
	replace voteleave=. if ecpolicy==.
	replace voteleave=. if ecpolicy==9
	replace voteleave=. if ecpolicy==-2
	
	gen voteleave2=0
	replace voteleave2=1 if ecpolicy==1 
	replace voteleave2=1 if ecpolicy==2
	replace voteleave2=. if ecpolicy==.
	replace voteleave2=. if ecpolicy==9
	replace voteleave2=. if ecpolicy==-2

	gen leave_dk=0 if ecpolicy<=7
	replace leave_dk=1 if ecpolicy==8
	replace leave_dk=. if ecpolicy==.
	
	

***** MISV variables *****

*** party support

* disliking conservatives
	gen conhate = .
	replace conhate=0 if v14a==1
	replace conhate=0.25 if v14a==2
	replace conhate=0.5 if v14a==3
	replace conhate=0.75 if v14a==4
	replace conhate=1 if v14a==5
	
* disliking labour
	gen labhate = .
	replace labhate=0 if v14b==1
	replace labhate=0.25 if v14b==2
	replace labhate=0.5 if v14b==3
	replace labhate=0.75 if v14b==4
	replace labhate=1 if v14b==5

* party identification
	gen labour=0
	replace labour=1 if partyid==2

	gen conservative=0
	replace conservative=1 if partyid==1
	

	
*** voter position
	
* redistribution
	gen redist_lr = .
	replace redist_lr=0.00 if v47a==1
	replace redist_lr=0.25 if v47a==2
	replace redist_lr=0.50 if v47a==3
	replace redist_lr=0.75 if v47a==4
	replace redist_lr=1.00 if v47a==5

* trade unions
	gen unions_lr = .
	replace unions_lr=1 if v51a==1
	replace unions_lr=0 if v51a==2
	
* trade unions (scale)
	gen unions_lr_sc = .
	replace unions_lr_sc=1 if v51a==1
	replace unions_lr_sc=0 if v51a==2
	replace unions_lr_sc=.5 if v51a==8

* homosexuality
	gen gay_lr = .
	replace gay_lr=1 if v205d==1
	replace gay_lr=1 if v205d==2
	replace gay_lr=1 if v205d==3
	replace gay_lr=0 if v205d==4
	replace gay_lr=0 if v205d==5



*** party position
	
* recode variable for thinking cons are extreme
	gen conextr_p = .
	replace conextr_p=1 if v16a==1
	replace conextr_p=0 if v16a==2

* recode variable for thinking labs are extreme
	gen labextr_p = .
	replace labextr_p=1 if v16b==1
	replace labextr_p=0 if v16b==2

	
	
*** control variables

* religion
	gen religion=.
	*none
	replace religion=0 if v916==0  
	*christian
	replace religion=1 if v916>=1 & v916<=11 
	*other
	replace religion=2 if v916>=12
	
* religion (more categories)
	gen religion_cat=.
	*none
	replace religion_cat=0 if v916==0 
	*christian, non-catholic
	replace religion_cat=1 if v916==1
	replace religion_cat=1 if v916>=3 & v916<=11  
	*catholic
	replace religion_cat=2 if v916==2 
	*other
	replace religion_cat=3 if v916>=12
	
* age
	rename v915c age 
	replace age = . if age >= 98
	
* union
	gen union = 0
	replace union=1 if v903c==1
	replace union=1 if v903c==2
	replace union=. if v903c==9
	
* ethnicity
	gen white=0
	replace white=1 if v915a==7 | v915a==8 | v915a==9
	
* home
	gen ownhome=0
	replace ownhome=1 if tenure2==1

* education
	gen education=.
	replace education=1 if v911==00
	replace education=1 if v911<=15
	replace education=2 if v911==16
	replace education=3 if v911==17
	replace education=4 if v911==18
	replace education=5 if v911>=19
	replace education=6 if v911==97
	
	keep m imonth year constituency religion religion_cat age readpap edu sclass partyid labour conservative gender ragecat news ecpolicy voteleave voteleave2 leave_dk conhate labhate redist_lr unions_lr unions_lr_sc gay_lr conextr_p labextr_p religion age union white ownhome education  
	
	
	tempfile 92_clean
	save `92_clean'


	
	
// ---------- 93 file: ----------
	use "C:\your\path\to\bsa93.dta", clear
	
	drop year
	gen year=1993
	
	gen h1 = string(dateint, "%06.0f") 
	gen h2 = real(substr(h1, -5, 1)) 
	replace h2=. if dateint==99999999
	
	gen m=h2
	*one interview date is missspecified: 
	replace m = 11 if m ==1 
	gen imonth = ym(year, m)
	format imonth %tm
	drop h1 h2
	
	drop if region==1 | region==4
	
	gen h2=sector
	
	replace h2 = subinstr(h2," ","",.)
	
	gen constituency=h2
	drop constituency
	
	/*
	We (Foos and Bischof) needed to handcode the constituencies from the postcodes. 
	We did so by checking by hand into which 1997 constituencies the postcodes fall. 
	If they fall into more than one we used the one with most overlap:
	*/
	merge m:1 sector using  "C:\your\path\to\1993_postcodes.dta"
	
	keep rsoc dateint m imonth year whpaper ecpolicy constituency region stregion srinc rage rsex partyid1 rrgclass raceorig hedqual readpap rghgrp srgclass partyid2 redistrb homosex religion unionsa raceorig tenure2 tea
	
	rename srgclass sclass 
	replace sclass=5 if sclass==6

	rename readpap h1 
	gen readpap=0
	replace readpap=1 if h1==1 
	replace readpap=. if h1==9 
	lab var readpap "r reads newpaper 3+times per week"
	drop h1 

	rename hedqual edu
	
	gen occupation=rsoc

	rename rsex gender

	rename whpaper news
	replace news=. if news==-9 
	
	gen voteleave=0
	replace voteleave=1 if ecpolicy==1 
	replace voteleave=. if ecpolicy==.
	replace voteleave=. if ecpolicy==9
	replace voteleave=. if ecpolicy==-2
	
	gen voteleave2=0
	replace voteleave2=1 if ecpolicy==1 
	replace voteleave2=1 if ecpolicy==2
	replace voteleave2=. if ecpolicy==.
	replace voteleave2=. if ecpolicy==9
	replace voteleave2=. if ecpolicy==-2
	
	gen leave_dk=0 if ecpolicy<=7
	replace leave_dk=1 if ecpolicy==8
	replace leave_dk=. if ecpolicy==.
		
	gen h1=dateint
	tostring h1, replace format(%20.0f)
	replace h1 = "0" + h1 if length(h1) == 7
	gen date = date(h1,"DMY")
	format date %td
	gen month=month(date)
	gen day=day(date)
	gen const=.
	drop h1 
	
	
	
***** MISV variables *****

*** party support

* party identification
	gen labour=0
	replace labour=1 if partyid2==2

	gen conservative=0
	replace conservative=1 if partyid2==1
	
	rename partyid2 party	

	
*** voter position
	
* redistribution
	gen redist_lr = .
	replace redist_lr=0.00 if redistrb==1
	replace redist_lr=0.25 if redistrb==2
	replace redist_lr=0.50 if redistrb==3
	replace redist_lr=0.75 if redistrb==4
	replace redist_lr=1.00 if redistrb==5
	
* homosexuality
	gen gay_lr = .
	replace gay_lr=1 if homosex==1
	replace gay_lr=1 if homosex==2
	replace gay_lr=1 if homosex==3
	replace gay_lr=0 if homosex==4
	replace gay_lr=0 if homosex==5
	replace gay_lr=1 if homosex==6



*** control variables

* religion
	rename religion h1
	gen religion=.
	*none
	replace religion=0 if h1==1  
	*christian
	replace religion=1 if h1>=2 & h1<=8  
	replace religion=1 if h1>=21 & h1<=27
	*other
	replace religion=2 if h1>=9 & h1<=14
	
* religion (more categories)
	gen religion_cat=.
	*none
	replace religion_cat=0 if h1==1 
	*christian, non-catholic
	replace religion_cat=1 if h1==2
	replace religion_cat=1 if h1>=4 & h1<=8  
	replace religion_cat=1 if h1>=21 & h1<=27
	*catholic
	replace religion_cat=2 if h1==3 
	*other
	replace religion_cat=3 if h1>=9 & h1<=14
	drop h1 
	
* age
	rename rage age
	replace age=. if age >= 97
	
* union
	gen union = .
	replace union=1 if unionsa==1
	replace union=1 if unionsa==2
	replace union=0 if unionsa==3
	replace union=1 if unionsa==4
	
* ethnicity
	gen white=0
	replace white=1 if raceorig==7 | raceorig==8 | raceorig==9 
	
* home
	gen ownhome=0
	replace ownhome=1 if tenure2==1

* education
	gen education=tea
	replace education=6 if tea==6 | tea==7
	replace education=. if tea>=8
	
	
	tempfile 93_clean
	save `93_clean'

	
	
	
	
// ---------- 94 file: ----------
	use "C:\your\path\to\bsa94.dta", clear
	
	gen year=1994
	
	decode censparl, gen(constituency)
	replace constituency = subinstr(constituency," ","",.)
	replace constituency = subinstr(constituency,"&","and",.)
	replace constituency = subinstr(constituency,".","",.)
	
	gen h1 = string(dateint, "%06.0f") 
	gen h2 = real(substr(h1, -5, 1)) 
	replace h2=. if dateint==99999999 | dateint==99999998
	
	gen m=h2
	gen imonth = ym(year, m)
	format imonth %tm
	drop h1 h2
	
	keep rsoc dateint m imonth eecimm whpaper ecpolicy year constituency region stregion srinc rage rsex partyid1 rsocclas raceorig hedqual rghclass readpap religion rghgrp rsocclas partyid2 redistrb tupower hmswrng asianimm religion rage unionsa raceorig tenure2 tea
	
	rename readpap h1 
	gen readpap=0
	replace readpap=1 if h1==1 
	replace readpap=. if h1==9 
	lab var readpap "r reads newpaper 3+times per week"
	drop h1 
	
	rename hedqual edu
	
	rename rsocclas sclass 
	
	rename rsex gender

	rename eecimm EUimmigration
	rename whpaper news
	
	gen voteleave=0
	replace voteleave=1 if ecpolicy==1 
	replace voteleave=. if ecpolicy==.
	replace voteleave=. if ecpolicy==9
	replace voteleave=. if ecpolicy==-2
	
	gen voteleave2=0
	replace voteleave2=1 if ecpolicy==1 
	replace voteleave2=1 if ecpolicy==2
	replace voteleave2=. if ecpolicy==.
	replace voteleave2=. if ecpolicy==9
	replace voteleave2=. if ecpolicy==-2
	
	gen leave_dk=0 if ecpolicy<=7
	replace leave_dk=1 if ecpolicy==8
	replace leave_dk=. if ecpolicy==.
	
	gen h1=dateint
	tostring h1, replace format(%20.0f)
	replace h1 = "0" + h1 if length(h1) == 7
	gen date = date(h1,"DMY")
	format date %td
	gen month=month(date)
	gen day=day(date)
	gen const=.
	drop h1 

	gen occupation=rsoc
	
	
	
***** MISV variables *****

*** party support

* party identification
	gen labour=0
	replace labour=1 if partyid2==2

	gen conservative=0
	replace conservative=1 if partyid2==1
	
	rename partyid2 party	

	
*** voter position
	
* redistribution
	gen redist_lr = .
	replace redist_lr=0.00 if redistrb==1
	replace redist_lr=0.25 if redistrb==2
	replace redist_lr=0.50 if redistrb==3
	replace redist_lr=0.75 if redistrb==4
	replace redist_lr=1.00 if redistrb==5
	
* trade unions
	gen unions_lr = .
	replace unions_lr=1 if tupower==1
	replace unions_lr=1 if tupower==2
	replace unions_lr=0 if tupower==3
	replace unions_lr=0 if tupower==4
	replace unions_lr=0 if tupower==5
	
* trade unions (scale)
	gen unions_lr_sc = .
	replace unions_lr_sc=1 if tupower==1
	replace unions_lr_sc=.75 if tupower==2
	replace unions_lr_sc=.5 if tupower==3
	replace unions_lr_sc=.25 if tupower==4
	replace unions_lr_sc=0 if tupower==5
	
* homosexuality
	gen gay_lr = .
	replace gay_lr=1 if hmswrng==1
	replace gay_lr=1 if hmswrng==2
	replace gay_lr=0 if hmswrng==3
	replace gay_lr=0 if hmswrng==4
	replace gay_lr=1 if hmswrng==8
	
* opinion on asian immigration
	gen asianimmig_lr = .
	replace asianimmig_lr=0 if asianimm==1
	replace asianimmig_lr=1 if asianimm==2
	replace asianimmig_lr=0 if asianimm==3


*** control variables

* religion
	rename religion h1
	gen religion=.
	*none
	replace religion=0 if h1==1  
	*christian
	replace religion=1 if h1>=2 & h1<=8  
	replace religion=1 if h1>=21 & h1<=27
	*other
	replace religion=2 if h1>=9 & h1<=14
	
* religion (more categories)
	gen religion_cat=.
	*none
	replace religion_cat=0 if h1==1 
	*christian, non-catholic
	replace religion_cat=1 if h1==2
	replace religion_cat=1 if h1>=4 & h1<=8  
	replace religion_cat=1 if h1>=21 & h1<=27
	*catholic
	replace religion_cat=2 if h1==3 
	*other
	replace religion_cat=3 if h1>=9 & h1<=14
	drop h1 
	
* age
	rename rage age
	replace age=. if age >= 98
	
* union
	gen union = .
	replace union=1 if unionsa==1
	replace union=1 if unionsa==2
	replace union=0 if unionsa==3
	
* ethnicity
	gen white=0
	replace white=1 if raceorig==7 | raceorig==8 | raceorig==9 
	
* home
	gen ownhome=0
	replace ownhome=1 if tenure2==1

* education
	gen education=tea
	replace education=6 if tea==6 | tea==7
	replace education=. if tea>=8
	
	
	tempfile 94_clean
	save `94_clean'


	

// ---------- 95 file: ----------
	use "C:\your\path\to\bsa95.dta", clear
	
	gen year=1995
	
	gen h1 = string(dateint, "%06.0f") 
	gen h2 = real(substr(h1, -5, 1)) 
	replace h2=. if dateint==99999999 | dateint==99999998
	
	gen m=h2
	gen imonth = ym(year, m)
	format imonth %tm
	drop h1 h2
	
	decode censparl, gen(constituency)
	replace constituency = lower(constituency)
	replace constituency = subinstr(constituency," ","",.)
	replace constituency = subinstr(constituency,"&","and",.)
	replace constituency = subinstr(constituency,".","",.)
	
	keep rsoc dateint m imonth eecimm whpaper ecpolicy year constituency region stregion srinc rsex partyid1 rsocclas raceorig hedqual rghclass readpap rghgrp rsocclas partyid2 redistrb homosex asianimm religion rage unionsa raceorig tenure2 tea

	replace rghgrp=0 if rghgrp==-2 

	rename readpap h1 
	gen readpap=0
	replace readpap=1 if h1==1 
	replace readpap=. if h1==9 
	lab var readpap "r reads newpaper 3+times per week"
	drop h1 

	replace rghclass=12 if rghclass==-2
	
	rename rsocclas sclass 
	replace sclass=9 if sclass==-3
	replace sclass=-1 if sclass==-2
	
	rename hedqual edu
	
	rename rsex gender

	rename eecimm EUimmigration

	rename whpaper news

	gen voteleave=0
	replace voteleave=1 if ecpolicy==1 
	replace voteleave=. if ecpolicy==.
	replace voteleave=. if ecpolicy==9
	replace voteleave=. if ecpolicy==-2
	
	gen voteleave2=0
	replace voteleave2=1 if ecpolicy==1 
	replace voteleave2=1 if ecpolicy==2
	replace voteleave2=. if ecpolicy==.
	replace voteleave2=. if ecpolicy==9
	replace voteleave2=. if ecpolicy==-2
	
	gen leave_dk=0 if ecpolicy<=7
	replace leave_dk=1 if ecpolicy==8
	replace leave_dk=. if ecpolicy==.

	gen h1=dateint
	tostring h1, replace format(%20.0f)
	replace h1 = "0" + h1 if length(h1) == 7
	gen date = date(h1,"DMY")
	format date %td
	gen month=month(date)
	gen day=day(date)
	replace date=mdy(month, day, year)
	format date %td
	gen const=.
	drop h1 

	gen occupation=rsoc
	
	
	
	
	
***** MISV variables *****

*** party support

* party identification
	gen labour=0
	replace labour=1 if partyid2==2

	gen conservative=0
	replace conservative=1 if partyid2==1
	
	rename partyid2 party	

	
*** voter position
	
* redistribution
	gen redist_lr = .
	replace redist_lr=0.00 if redistrb==1
	replace redist_lr=0.25 if redistrb==2
	replace redist_lr=0.50 if redistrb==3
	replace redist_lr=0.75 if redistrb==4
	replace redist_lr=1.00 if redistrb==5
	
* homosexuality
	gen gay_lr = .
	replace gay_lr=1 if homosex==1
	replace gay_lr=1 if homosex==2
	replace gay_lr=1 if homosex==3
	replace gay_lr=0 if homosex==4
	replace gay_lr=0 if homosex==5
	replace gay_lr=1 if homosex==6
	
* opinion on asian immigration
	gen asianimmig_lr = .
	replace asianimmig_lr=0 if asianimm==1
	replace asianimmig_lr=1 if asianimm==2
	replace asianimmig_lr=0 if asianimm==3


*** control variables

* religion
	rename religion h1
	gen religion=.
	*none
	replace religion=0 if h1==1  
	*christian
	replace religion=1 if h1>=2 & h1<=8  
	replace religion=1 if h1>=21 & h1<=27
	*other
	replace religion=2 if h1>=9 & h1<=14
	
* religion (more categories)
	gen religion_cat=.
	*none
	replace religion_cat=0 if h1==1 
	*christian, non-catholic
	replace religion_cat=1 if h1==2
	replace religion_cat=1 if h1>=4 & h1<=8  
	replace religion_cat=1 if h1>=21 & h1<=27
	*catholic
	replace religion_cat=2 if h1==3 
	*other
	replace religion_cat=3 if h1>=9 & h1<=14
	drop h1 
	
* age
	rename rage age
	replace age=. if age >= 98
	
* union
	gen union = .
	replace union=1 if unionsa==1
	replace union=1 if unionsa==2
	replace union=0 if unionsa==3
	
* ethnicity
	gen white=0
	replace white=1 if raceorig==7 | raceorig==8 | raceorig==9 
	
* home
	gen ownhome=0
	replace ownhome=1 if tenure2==1

* education
	gen education=tea
	replace education=6 if tea==6 | tea==7
	replace education=. if tea>=8
	
	
	tempfile 95_clean
	save `95_clean'

	
	
	
// ---------- 96 file: ----------
	use "C:\your\path\to\bsa96_g921au.dta", clear
	
	gen year=1996

	gen h1 = string(dateint, "%06.0f") 
	gen h2 = real(substr(h1, -5, 1)) 
	replace h2=. if dateint==99999999 | dateint==99999998
	
	gen m=h2
	gen imonth = ym(year, m)
	format imonth %tm
	drop h1 h2
	
	decode censparl, gen(constituency)
	replace constituency = subinstr(constituency," ","",.)
	replace constituency = subinstr(constituency,"&","and",.)
	replace constituency = subinstr(constituency,".","",.)
	
	*
	keep rsoc dateint m imonth eecimm whpaper ecpolicy year constituency region stregion rage rsex partyid1 rsocclas raceori2 hedqual rghclass readpap religion rghgrp rsocclas partyid2 confeel5 labfeel5 redistrb tupower gaysex asianimm conxtrme labxtrme religion rage unionsa raceori2 tenure2 tea
		
	*
	replace rghgrp=0 if rghgrp==-2 
	replace rghgrp=9 if rghgrp==-3 
	
	rename rsocclas sclass 
	replace sclass=9 if sclass==-3
	replace sclass=-1 if sclass==-2

	rename readpap h1 
	gen readpap=0
	replace readpap=1 if h1==1 
	replace readpap=. if h1==9 
	lab var readpap "r reads newpaper 3+times per week"
	drop h1 

	replace rghclass=12 if rghclass==-2
	replace rghclass=99 if rghclass==-3
	label define rghclass 12 "never had job" 1 "prof&man:high grade" 2 "prof&man:low grade" 3 "routine office" 4 "sales & personal" 5 "small p-b w emps" 6 "small p-b w-o emps" 7 "farmers" 8 "manual  foremen etc" 9 "skilled manual" 10 "semi, unskil manual" 11 "agric.  employees" 99 "insuffic info", replace
	
	rename hedqual edu

	rename rsex gender

	rename eecimm EUimmigration

	rename whpaper news
	replace news=0 if news==-1

	gen voteleave=0
	replace voteleave=1 if ecpolicy==1 
	replace voteleave=. if ecpolicy==.
	replace voteleave=. if ecpolicy==9
	replace voteleave=. if ecpolicy==-2
	
	gen leave_dk=0 if ecpolicy<=7
	replace leave_dk=1 if ecpolicy==8
	replace leave_dk=. if ecpolicy==.
		
	gen h1=dateint
	tostring h1, replace format(%20.0f)
	replace h1 = "0" + h1 if length(h1) == 7
	gen date = date(h1,"DMY")
	format date %td
	gen month=month(date)
	gen day=day(date)
	drop h1 
	gen const=.

	gen occupation=rsoc
	
	
***** MISV variables *****

*** party support

* disliking conservatives
	gen conhate = .
	replace conhate=0 if confeel5==1
	replace conhate=0.25 if confeel5==2
	replace conhate=0.5 if confeel5==3
	replace conhate=0.75 if confeel5==4
	replace conhate=1 if confeel5==5
	
* disliking labour
	gen labhate = .
	replace labhate=0 if labfeel5==1
	replace labhate=0.25 if labfeel5==2
	replace labhate=0.5 if labfeel5==3
	replace labhate=0.75 if labfeel5==4
	replace labhate=1 if labfeel5==5

* party identification
	gen labour=0
	replace labour=1 if partyid2==2

	gen conservative=0
	replace conservative=1 if partyid2==1
	
	rename partyid2 party	

	
*** voter position
	
* redistribution
	gen redist_lr = .
	replace redist_lr=0.00 if redistrb==1
	replace redist_lr=0.25 if redistrb==2
	replace redist_lr=0.50 if redistrb==3
	replace redist_lr=0.75 if redistrb==4
	replace redist_lr=1.00 if redistrb==5

* trade unions
	gen unions_lr = .
	replace unions_lr=1 if tupower==1
	replace unions_lr=1 if tupower==2
	replace unions_lr=0 if tupower==3
	replace unions_lr=0 if tupower==4
	replace unions_lr=0 if tupower==5
	
* trade unions (scale)
	gen unions_lr_sc = .
	replace unions_lr_sc=1 if tupower==1
	replace unions_lr_sc=.75 if tupower==2
	replace unions_lr_sc=.5 if tupower==3
	replace unions_lr_sc=.25 if tupower==4
	replace unions_lr_sc=0 if tupower==5
	
* homosexuality
	gen gay_lr = .
	replace gay_lr=1 if gaysex==1
	replace gay_lr=1 if gaysex==2
	replace gay_lr=1 if gaysex==3
	replace gay_lr=0 if gaysex==4
	replace gay_lr=0 if gaysex==5
	
* opinion on asian immigration
	gen asianimmig_lr = .
	replace asianimmig_lr=0 if asianimm==1
	replace asianimmig_lr=1 if asianimm==2
	replace asianimmig_lr=0 if asianimm==3


*** party position
	
* conservatives are extreme
	gen conextr_p = .
	replace conextr_p=1 if conxtrme==1
	replace conextr_p=0 if conxtrme==2

* labour is extreme
	gen labextr_p = .
	replace labextr_p=1 if labxtrme==1
	replace labextr_p=0 if labxtrme==2

	
*** control variables

* religion
	rename religion h1
	gen religion=.
	*none
	replace religion=0 if h1==1  
	*christian
	replace religion=1 if h1>=2 & h1<=8  
	replace religion=1 if h1>=21 & h1<=27
	*other
	replace religion=2 if h1>=9 & h1<=14
	
* religion (more categories)
	gen religion_cat=.
	*none
	replace religion_cat=0 if h1==1 
	*christian, non-catholic
	replace religion_cat=1 if h1==2
	replace religion_cat=1 if h1>=4 & h1<=8  
	replace religion_cat=1 if h1>=21 & h1<=27
	*catholic
	replace religion_cat=2 if h1==3 
	*other
	replace religion_cat=3 if h1>=9 & h1<=14
	drop h1 
	
* age
	rename rage age
	replace age=. if age >= 98
	
* union
	gen union = .
	replace union=1 if unionsa==1
	replace union=1 if unionsa==2
	replace union=0 if unionsa==3
	
* ethnicity
	gen white=0
	replace white=1 if raceori2==9 | raceori2==10 
	
* home
	gen ownhome=0
	replace ownhome=1 if tenure2==1

* education
	gen education=tea
	replace education=6 if tea==6 | tea==7
	replace education=. if tea>=8
	
	
	tempfile 96_clean
	save `96_clean'



	

// ---------- 97 file: ----------
	use "C:\your\path\to\bsa97a.dta", clear
	
	gen year=1997

	rename iintdate dateint
	rename raceori2 raceorig

	tostring dateint, replace 

	gen h2 = real(substr(dateint, -5, 1)) 

	gen m=h2
	gen imonth = ym(year, m)
	format imonth %tm
	drop h2
	
	*
	keep sector rsoc raceorig dateint m imonth whpaper ecpolicy year region stregion rsex rage partyid1 rsocclas readpap religion hedqual partyid2 tuinf2 rage unionsa raceorig tenure2 tea
	*
	
	rename rsocclas sclass  
	
	rename religion h1

	gen religion=0
	replace religion=1 if h1!=1
	replace religion=. if h1>=97 & h1<=99
	drop h1 

	rename readpap h1 
	gen readpap=0
	replace readpap=1 if h1==1 
	replace readpap=. if h1==9 
	lab var readpap "r reads newpaper 3+times per week"
	drop h1 

	rename hedqual edu

	gen libdem=0
	replace libdem=1 if partyid1==3

	gen nonid=0
	replace nonid=1 if partyid1==10

	rename rsex gender
	rename whpaper news
	replace news=0 if news==-1

	gen voteleave=0
	replace voteleave=1 if ecpolicy==1 
	replace voteleave=. if ecpolicy==.
	replace voteleave=. if ecpolicy==9
	replace voteleave=. if ecpolicy==-2
	
	gen voteleave2=0
	replace voteleave2=1 if ecpolicy==1 
	replace voteleave2=1 if ecpolicy==2
	replace voteleave2=. if ecpolicy==.
	replace voteleave2=. if ecpolicy==9
	replace voteleave2=. if ecpolicy==-2
	
	gen leave_dk=0 if ecpolicy<=7
	replace leave_dk=1 if ecpolicy==8
	replace leave_dk=. if ecpolicy==.
	
	gen h1=dateint
	tostring h1, replace format(%20.0f)
	replace h1 = "0" + h1 if length(h1) == 7
	gen date = date(h1,"DMY")
	format date %td
	gen month=month(date)
	gen day=day(date)
	drop h1 

	gen occupation=rsoc

	
	


***** MISV variables *****

*** party support

* party identification
	gen labour=0
	replace labour=1 if partyid2==2

	gen conservative=0
	replace conservative=1 if partyid2==1
	
	rename partyid2 party	

	
*** voter position

* trade unions
	gen unions_lr = .
	replace unions_lr=1 if tuinf2==1
	replace unions_lr=0 if tuinf2==2
	replace unions_lr=0 if tuinf2==3

	
*** control variables

* religion
	rename religion h1
	gen religion=.
	*none
	replace religion=0 if h1==1  
	*christian
	replace religion=1 if h1>=2 & h1<=8  
	replace religion=1 if h1>=21 & h1<=27
	*other
	replace religion=2 if h1>=9 & h1<=14
	
* religion (more categories)
	gen religion_cat=.
	*none
	replace religion_cat=0 if h1==1 
	*christian, non-catholic
	replace religion_cat=1 if h1==2
	replace religion_cat=1 if h1>=4 & h1<=8  
	replace religion_cat=1 if h1>=21 & h1<=27
	*catholic
	replace religion_cat=2 if h1==3 
	*other
	replace religion_cat=3 if h1>=9 & h1<=14
	drop h1 
	
* age
	rename rage age
	replace age=. if age >= 98
	
* union
	gen union = .
	replace union=1 if unionsa==1
	replace union=1 if unionsa==2
	replace union=0 if unionsa==3
	
* ethnicity
	gen white=0
	replace white=1 if raceorig==9 | raceorig==10 
	
* home
	gen ownhome=0
	replace ownhome=1 if tenure2==1

* education
	gen education=tea
	replace education=6 if tea==6 | tea==7
	replace education=. if tea>=8
	
	
	
	tempfile 97_clean
	save `97_clean'

	
	/*
	We (Foos and Bischof) needed to handcode the constituencies from the postcodes. 
	We did so by checking by hand into which 1997 constituencies the postcodes fall. 
	If they fall into more than one we used the one with most overlap:
	*/

	//we first read in the 1997 postcode bridge file bc for 1997 no constituencies available in BSA:
	import excel "C:\your\path\to\1997_postcodes.xlsx", sheet("Sheet1") firstrow clear
	
	rename ps sector 
	rename _constituency1 constituency_1997

	keep sector constituency_1997

	replace constituency_1997 = "barnsleywestandpenistone" if constituency_1997 == "Penistone"

	save "C:\your\path\to\1997_postcodes.dta", replace

	//merge postcodes with 1997 constituencies:
	use "`97_clean'"

	* some cleaning in the handcoded data is necessary before merging: 
	replace sector = "B8 1" if sector == "B8  1"
	replace sector = "BD23 2" if sector == "BD232"
	replace sector = "BS20 9" if sector == "BS209"
	replace sector = "CF64 4" if sector == "CF644"
	replace sector = "CV34 5" if sector == "CV345"
	replace sector = "DL12 0" if sector == "DL120"
	replace sector = "DY14 9" if sector == "DY149"
	replace sector = "IG11 0" if sector == "IG110"
	replace sector = "KA26 0" if sector == "KA260"
	replace sector = "LL25 0" if sector == "LL250"
	replace sector = "LL55 2" if sector == "LL552"
	replace sector = "LS24 9" if sector == "LS249"
	replace sector = "NG16 6" if sector == "NG166"
	replace sector = "NR28 9" if sector == "NR289"
	replace sector = "NR30 2" if sector == "NR302"
	replace sector = "NW10 9" if sector == "NW109"
	replace sector = "OX12 0" if sector == "OX120"
	replace sector = "PE11 4" if sector == "PE114"
	replace sector = "PO11 9" if sector == "PO119"
	replace sector = "PO16 8" if sector == "PO168"
	replace sector = "PO40 9" if sector == "PO409"
	replace sector = "SK15 3" if sector == "SK153"
	replace sector = "SO50 7" if sector == "SO507"
	replace sector = "SW18 5" if sector == "SW185"
	replace sector = "SY10 9" if sector == "SY109"
	replace sector = "TA18 8" if sector == "TA188"
	replace sector = "TN35 5" if sector == "TN355"
	replace sector = "TS25 2" if sector == "TS252"
	replace sector = "TS25 3" if sector == "TS253"
	replace sector = "TS25 5" if sector == "TS255"
	replace sector = "W4 3" if sector == "W4  3"

	merge m:1 sector using "C:\your\path\to\1997_postcodes.dta"

	drop _merge 

	drop sector

	rename constituency_1997 sector_constituency_1997

	tempfile 97_clean
	save `97_clean'

	
	
	

// ---------- 98 file: ----------
	use "C:\your\path\to\bsa98a.dta", clear

	gen year=1998
	rename intdate dateint
	
	gen h2 = real(substr(dateint, -5, 1)) 
	
	gen m=h2
	gen imonth = ym(year, m)
	format imonth %tm
	drop h2
	
	decode pano, gen(constituency)
	replace constituency = subinstr(constituency," ","",.)
	replace constituency = subinstr(constituency,"&","and",.)
	replace constituency = subinstr(constituency,"l'","liver",.)
	replace constituency = subinstr(constituency,".","",.)
	
	keep rsoc dateint m imonth constituency whpaper ecpolicy year region stregion rage rsex partyid1 rsocclas raceori2 hedqual rghclass readpap religion rghgrp rsocclas partyid2 tupower unionsa tenure2 tea

	replace rghgrp=0 if rghgrp==-1 
	rename rsocclas sclass 

	rename readpap h1 
	gen readpap=0
	replace readpap=1 if h1==1 
	replace readpap=. if h1==9 
	lab var readpap "r reads newpaper 3+times per week"
	drop h1 

	replace rghclass=12 if rghclass==-2
	replace rghclass=99 if rghclass==-3
	label define rghclass 12 "never had job" 1 "prof&man:high grade" 2 "prof&man:low grade" 3 "routine office" 4 "sales & personal" 5 "small p-b w emps" 6 "small p-b w-o emps" 7 "farmers" 8 "manual  foremen etc" 9 "skilled manual" 10 "semi, unskil manual" 11 "agric.  employees" 99 "insuffic info", replace
	
	rename hedqual edu

	gen libdem=0
	replace libdem=1 if partyid1==3

	gen nonid=0
	replace nonid=1 if partyid1==10

	rename rsex gender

	rename whpaper news
	replace news=0 if news==-1

	gen voteleave=0
	replace voteleave=1 if ecpolicy==1 
	replace voteleave=. if ecpolicy==.
	replace voteleave=. if ecpolicy==9
	replace voteleave=. if ecpolicy==-2
	
	gen voteleave2=0
	replace voteleave2=1 if ecpolicy==1 
	replace voteleave2=1 if ecpolicy==2
	replace voteleave2=. if ecpolicy==.
	replace voteleave2=. if ecpolicy==9
	replace voteleave2=. if ecpolicy==-2
	
	gen leave_dk=0 if ecpolicy<=7
	replace leave_dk=1 if ecpolicy==8
	replace leave_dk=. if ecpolicy==.
	
	gen h1=dateint
	tostring h1, replace format(%20.0f)
	replace h1 = "0" + h1 if length(h1) == 7
	gen date = date(h1,"DMY")
	format date %td
	gen month=month(date)
	gen day=day(date)
	drop h1 
	gen const=.

	gen occupation=rsoc
	
	
	
***** MISV variables *****

*** party support

* party identification
	gen labour=0
	replace labour=1 if partyid2==2

	gen conservative=0
	replace conservative=1 if partyid2==1
	
	rename partyid2 party	

	
*** voter position

* trade unions
	gen unions_lr = .
	replace unions_lr=1 if tupower==1
	replace unions_lr=1 if tupower==2
	replace unions_lr=0 if tupower==3
	replace unions_lr=0 if tupower==4
	replace unions_lr=0 if tupower==5

	
*** control variables

* religion
	rename religion h1
	gen religion=.
	*none
	replace religion=0 if h1==1  
	*christian
	replace religion=1 if h1>=2 & h1<=8  
	replace religion=1 if h1>=21 & h1<=27
	*other
	replace religion=2 if h1>=9 & h1<=14
	
* religion (more categories)
	gen religion_cat=.
	*none
	replace religion_cat=0 if h1==1 
	*christian, non-catholic
	replace religion_cat=1 if h1==2
	replace religion_cat=1 if h1>=4 & h1<=8  
	replace religion_cat=1 if h1>=21 & h1<=27
	*catholic
	replace religion_cat=2 if h1==3 
	*other
	replace religion_cat=3 if h1>=9 & h1<=14
	drop h1 
	
* age
	rename rage age
	replace age=. if age >= 98
	
* union
	gen union = .
	replace union=1 if unionsa==1
	replace union=1 if unionsa==2
	replace union=0 if unionsa==3
	
* ethnicity
	gen white=0
	replace white=1 if raceori2==9 | raceori2==10 
	
* home
	gen ownhome=0
	replace ownhome=1 if tenure2==1

* education
	gen education=tea
	replace education=6 if tea==6 | tea==7
	replace education=. if tea>=8
	
	tempfile 98_clean
	save `98_clean'


	
	
	
// ---------- 99 file: ----------
	use "C:\your\path\to\bsa99a.dta", clear

	gen year=1999
	rename intdate dateint
	
	gen h2 = real(substr(dateint, -5, 1)) 
	
	gen m=h2
	gen imonth = ym(year, m)
	format imonth %tm
	drop h2
	
	keep postcode rsoc dateint m imonth whpaper ecpolicy year region stregion rage rsex partyid1 partyid2 rsocclas raceori2 hedqual rghclass readpap religion rghgrp rsocclas unionsa tenure2 tea
	
	replace rghgrp=0 if rghgrp==-1 

	rename rsocclas sclass 

	rename readpap h1 
	gen readpap=0
	replace readpap=1 if h1==1 
	replace readpap=. if h1==9 
	lab var readpap "r reads newpaper 3+times per week"
	drop h1 

	replace rghclass=12 if rghclass==-2
	replace rghclass=99 if rghclass==-3
	label define rghclass 12 "never had job" 1 "prof&man:high grade" 2 "prof&man:low grade" 3 "routine office" 4 "sales & personal" 5 "small p-b w emps" 6 "small p-b w-o emps" 7 "farmers" 8 "manual  foremen etc" 9 "skilled manual" 10 "semi, unskil manual" 11 "agric.  employees" 99 "insuffic info", replace
	
	rename hedqual edu
	
	rename rsex gender

	rename whpaper news
	replace news=0 if news==-1

	gen voteleave=0
	replace voteleave=1 if ecpolicy==1 
	replace voteleave=. if ecpolicy==.
	replace voteleave=. if ecpolicy==9
	replace voteleave=. if ecpolicy==-2
	
	gen voteleave2=0
	replace voteleave2=1 if ecpolicy==1 
	replace voteleave2=1 if ecpolicy==2
	replace voteleave2=. if ecpolicy==.
	replace voteleave2=. if ecpolicy==9
	replace voteleave2=. if ecpolicy==-2
	
	gen leave_dk=0 if ecpolicy<=7
	replace leave_dk=1 if ecpolicy==8
	replace leave_dk=. if ecpolicy==.

	gen h1=dateint
	tostring h1, replace format(%20.0f)
	replace h1 = "0" + h1 if length(h1) == 7
	gen date = date(h1,"DMY")
	format date %td
	gen month=month(date)
	gen day=day(date)
	drop h1 

	gen const=.

	gen occupation=rsoc
	
	
	
***** MISV variables *****

*** party support

* party identification
	gen labour=0
	replace labour=1 if partyid2==2

	gen conservative=0
	replace conservative=1 if partyid2==1
	
	rename partyid2 party	

	
*** control variables

* religion
	rename religion h1
	gen religion=.
	*none
	replace religion=0 if h1==1  
	*christian
	replace religion=1 if h1>=2 & h1<=8  
	replace religion=1 if h1>=21 & h1<=27
	*other
	replace religion=2 if h1>=9 & h1<=14
	
* religion (more categories)
	gen religion_cat=.
	*none
	replace religion_cat=0 if h1==1 
	*christian, non-catholic
	replace religion_cat=1 if h1==2
	replace religion_cat=1 if h1>=4 & h1<=8  
	replace religion_cat=1 if h1>=21 & h1<=27
	*catholic
	replace religion_cat=2 if h1==3 
	*other
	replace religion_cat=3 if h1>=9 & h1<=14
	drop h1 
	
* age
	rename rage age
	replace age=. if age >= 98
	
* union
	gen union = .
	replace union=1 if unionsa==1
	replace union=1 if unionsa==2
	replace union=0 if unionsa==3
	
* ethnicity
	gen white=0
	replace white=1 if raceori2==9 | raceori2==10 
	
* home
	gen ownhome=0
	replace ownhome=1 if tenure2==1

* education
	gen education=tea
	replace education=6 if tea==6 | tea==7
	replace education=. if tea>=8
	
	tempfile 99_clean
	save `99_clean'


	/*
	We (Foos and Bischof) needed to handcode the constituencies from the postcodes. 
	We did so by checking by hand into which 1997 constituencies the postcodes fall. 
	If they fall into more than one we used the one with most overlap:
	*/

	//we first read in the 1999 postcode bridge file becuase for 1997 no constituencies are available in BSA:
	import excel "C:\your\path\to\1999_postcodes.xlsx", sheet("Sheet1") firstrow clear

	rename pd postcode 
	rename _Constituency1 constituency_1997

	keep postcode constituency_1997

	save "C:\your\path\to\1999_postcodes.dta", replace

	//merge postcodes with 1997 constituencies: 
	use "`99_clean'" 

	merge m:1 postcode using "C:\your\path\to\1999_postcodes.dta"

	drop _merge 

	drop postcode

	rename constituency_1997 sector_constituency_1999

	tempfile 99_clean
	save `99_clean'


	
	
	
// ---------- 00 file: ----------
	use "C:\your\path\to\bsa00.dta", clear
	
	gen year=2000
	rename intdate dateint
	
	gen h1 = string(dateint, "%06.0f") 
	gen h2 = real(substr(h1, -6, 2)) 
	
	gen m=h2
	gen imonth = ym(year, m)
	format imonth %tm
	drop h1 h2 
	
	decode concode, gen(constituency)
	replace constituency = subinstr(constituency," ","",.)
	replace constituency = subinstr(constituency,"&","and",.)
	replace constituency = subinstr(constituency,".","",.)
	
	keep rsoc dateint m imonth constituency whpaper ecpolicy year region stregion rage rsex partyid1 partyid2 rsocclas raceori2 hedqual rghclass readpap religion rghgrp rsocclas unionsa raceori2 tenure2 tea
	
	*
	rename rsocclas sclass 

	rename readpap h1 
	gen readpap=0
	replace readpap=1 if h1==1 
	replace readpap=. if h1==9 
	lab var readpap "r reads newpaper 3+times per week"
	drop h1 

	replace rghclass=12 if rghclass==-2
	replace rghclass=99 if rghclass==-3
	label define rghclass 12 "never had job" 1 "prof&man:high grade" 2 "prof&man:low grade" 3 "routine office" 4 "sales & personal" 5 "small p-b w emps" 6 "small p-b w-o emps" 7 "farmers" 8 "manual  foremen etc" 9 "skilled manual" 10 "semi, unskil manual" 11 "agric.  employees" 99 "insuffic info", replace
	
	rename hedqual edu

	rename rsex gender

	rename whpaper news
	replace news=0 if news==-1

	gen voteleave=0
	replace voteleave=1 if ecpolicy==1 
	replace voteleave=. if ecpolicy==.
	replace voteleave=. if ecpolicy==9
	replace voteleave=. if ecpolicy==-2
	
	gen voteleave2=0
	replace voteleave2=1 if ecpolicy==1 
	replace voteleave2=1 if ecpolicy==2
	replace voteleave2=. if ecpolicy==.
	replace voteleave2=. if ecpolicy==9
	replace voteleave2=. if ecpolicy==-2
	
	gen leave_dk=0 if ecpolicy<=7
	replace leave_dk=1 if ecpolicy==8
	replace leave_dk=. if ecpolicy==.
		
	gen h1=dateint
	tostring h1, replace format(%20.0f)
	replace h1 = "0" + h1 if length(h1) == 7
	gen date = date(h1,"DMY")
	format date %td
	gen month=month(date)
	gen day=day(date)
	drop h1 
	gen const=.

	gen occupation=rsoc
	
	

***** MISV variables *****

*** party support

* party identification
	gen labour=0
	replace labour=1 if partyid2==2

	gen conservative=0
	replace conservative=1 if partyid2==1
	
	rename partyid2 party	

	
*** control variables

* religion
	rename religion h1
	gen religion=.
	*none
	replace religion=0 if h1==1  
	*christian
	replace religion=1 if h1>=2 & h1<=8  
	replace religion=1 if h1>=21 & h1<=27
	*other
	replace religion=2 if h1>=9 & h1<=14
	
* religion (more categories)
	gen religion_cat=.
	*none
	replace religion_cat=0 if h1==1 
	*christian, non-catholic
	replace religion_cat=1 if h1==2
	replace religion_cat=1 if h1>=4 & h1<=8  
	replace religion_cat=1 if h1>=21 & h1<=27
	*catholic
	replace religion_cat=2 if h1==3 
	*other
	replace religion_cat=3 if h1>=9 & h1<=14
	drop h1 
	
* age
	rename rage age
	replace age=. if age >= 98
	
* union
	gen union = .
	replace union=1 if unionsa==1
	replace union=1 if unionsa==2
	replace union=0 if unionsa==3
	
* ethnicity
	gen white=0
	replace white=1 if raceori2==9 | raceori2==10 
	
* home
	gen ownhome=0
	replace ownhome=1 if tenure2==1

* education
	gen education=tea
	replace education=6 if tea==6 | tea==7
	replace education=. if tea>=8
	
	tempfile 00_clean
	save `00_clean'


	
	
	
// ---------- 01 file: ----------
	use "C:\your\path\to\bsa01.dta", clear 
	
	gen year=2001
	rename intdate dateint
	rename srsocc1e rsocclas
	
	gen h2 = real(substr(dateint, -6, 2)) 
	gen m=h2
	gen imonth = ym(year, m)
	format imonth %tm
	drop  h2 
	
	keep rsoc2000 dateint m imonth conname whpaper ecpolicy year region stregion rage rsex partyid1 rsocclas raceori2 hedqual readpap religion rnghgrp rnsoccl partyid2 unionsa raceori2 tenure2 tea
	*
	rename conname constituency
	replace constituency = subinstr(constituency," ","",.)
	replace constituency = lower(constituency)
	replace constituency = subinstr(constituency, "boroconst","", .)
	replace constituency = subinstr(constituency, "borocons","", .)
	replace constituency = subinstr(constituency, "boroco","", .)
	replace constituency = subinstr(constituency, "boroc","", .) 
	replace constituency = subinstr(constituency, "boro","", .) 
	replace constituency = subinstr(constituency, "borough","", .) 
	replace constituency = subinstr(constituency, "coconst","", .)
	replace constituency = subinstr(constituency, "cocon","", .)
	replace constituency = subinstr(constituency, "const","", .)
	replace constituency = subinstr(constituency,"&","and",.)
	replace constituency = subinstr(constituency,".","",.)
	
	rename rnghgrp rghgrp
	replace rghgrp=0 if rghgrp==-1 
	replace rghgrp=9 if rghgrp==8 

	rename  rnsoccl sclass 
	replace sclass=5 if sclass==6

	rename readpap h1 
	gen readpap=0
	replace readpap=1 if h1==1 
	replace readpap=. if h1==9 
	lab var readpap "r reads newpaper 3+times per week"
	drop h1 

	rename hedqual edu

	rename rsex gender

	rename whpaper news
	replace news=0 if news==-1

	gen voteleave=0
	replace voteleave=1 if ecpolicy==1 
	replace voteleave=. if ecpolicy==.
	replace voteleave=. if ecpolicy==9
	replace voteleave=. if ecpolicy==-2
	
	gen voteleave2=0
	replace voteleave2=1 if ecpolicy==1 
	replace voteleave2=1 if ecpolicy==2
	replace voteleave2=. if ecpolicy==.
	replace voteleave2=. if ecpolicy==9
	replace voteleave2=. if ecpolicy==-2
	
	gen leave_dk=0 if ecpolicy<=7
	replace leave_dk=1 if ecpolicy==8
	replace leave_dk=. if ecpolicy==.
	
	gen h1=dateint
	tostring h1, replace format(%20.0f)
	replace h1 = "0" + h1 if length(h1) == 7
	gen date = date(h1,"DMY")
	format date %td
	gen month=month(date)
	gen day=day(date)
	drop h1 
	gen const=constituency

	gen occupation=rsoc2000

	
	
***** MISV variables *****

*** party support

* party identification
	gen labour=0
	replace labour=1 if partyid2==2

	gen conservative=0
	replace conservative=1 if partyid2==1
	
	rename partyid2 party	

	
*** control variables

* religion
	rename religion h1
	gen religion=.
	*none
	replace religion=0 if h1==1  
	*christian
	replace religion=1 if h1>=2 & h1<=8  
	replace religion=1 if h1>=21 & h1<=27
	*other
	replace religion=2 if h1>=9 & h1<=14
	
* religion (more categories)
	gen religion_cat=.
	*none
	replace religion_cat=0 if h1==1 
	*christian, non-catholic
	replace religion_cat=1 if h1==2
	replace religion_cat=1 if h1>=4 & h1<=8  
	replace religion_cat=1 if h1>=21 & h1<=27
	*catholic
	replace religion_cat=2 if h1==3 
	*other
	replace religion_cat=3 if h1>=9 & h1<=14
	drop h1 
	
* age
	rename rage age
	replace age=. if age >= 98
	
* union
	gen union = .
	replace union=1 if unionsa==1
	replace union=1 if unionsa==2
	replace union=0 if unionsa==3
	
* ethnicity
	gen white=0
	replace white=1 if raceori2==9 | raceori2==10 
	
* home
	gen ownhome=0
	replace ownhome=1 if tenure2==1

* education
	gen education=tea
	replace education=6 if tea==6 | tea==7
	replace education=. if tea>=8
	
	tempfile 01_clean
	save `01_clean'

	
	

	
// ---------- 02 file: ----------
	use "C:\your\path\to\bsa02.dta", clear 
	
	gen year=2002
	rename intdate dateint
	rename rclass rsocclas
	
	gen h2 = real(substr(dateint, -6, 2)) 
	gen m=h2
	gen imonth = ym(year, m)
	format imonth %tm
	drop  h2 
	
	keep rsoc2000 dateint m imonth conname1 whpaper ecpolicy year region stregion rage rsex partyid1 rsocclas raceori2 hedqual readpap religion rnghgrp rnsoccl partyid2 unionsa raceori2 tenure2 tea
	*
	rename conname1 constituency
	replace constituency = subinstr(constituency," ","",.)
	replace constituency = lower(constituency)
	replace constituency = subinstr(constituency, "boroconst","", .)
	replace constituency = subinstr(constituency, "borocons","", .)
	replace constituency = subinstr(constituency, "boroco","", .)
	replace constituency = subinstr(constituency, "boroc","", .) 
	replace constituency = subinstr(constituency, "boro","", .) 
	replace constituency = subinstr(constituency, "borough","", .) 
	replace constituency = subinstr(constituency, "coconst","", .)
	replace constituency = subinstr(constituency, "cocon","", .)
	replace constituency = subinstr(constituency, "const","", .)
	replace constituency = subinstr(constituency,"&","and",.)
	replace constituency = subinstr(constituency,".","",.)
	
	rename rnghgrp rghgrp
	replace rghgrp=0 if rghgrp==-1 
	replace rghgrp=9 if rghgrp==8

	rename rnsoccl sclass 
	replace sclass=5 if sclass==6

	rename readpap h1 
	gen readpap=0
	replace readpap=1 if h1==1 
	replace readpap=. if h1==9 
	lab var readpap "r reads newpaper 3+times per week"
	drop h1 

	rename hedqual edu
	
	rename rsex gender

	rename whpaper news
	replace news=0 if news==-1

	gen voteleave=0
	replace voteleave=1 if ecpolicy==1 
	replace voteleave=. if ecpolicy==.
	replace voteleave=. if ecpolicy==-2
	
	gen voteleave2=0
	replace voteleave2=1 if ecpolicy==1 
	replace voteleave2=1 if ecpolicy==2
	replace voteleave2=. if ecpolicy==.
	replace voteleave2=. if ecpolicy==9
	replace voteleave2=. if ecpolicy==-2
	
	gen leave_dk=0 if ecpolicy<=7
	replace leave_dk=1 if ecpolicy==8
	replace leave_dk=. if ecpolicy==. | ecpolicy==-2 
		
	gen h1=dateint
	tostring h1, replace format(%20.0f)
	replace h1 = "0" + h1 if length(h1) == 7
	gen date = date(h1,"DMY")
	format date %td
	gen month=month(date)
	gen day=day(date)
	drop h1 
	gen const=constituency
	
	gen occupation2000=rsoc2000
	
	
	
***** MISV variables *****

*** party support

* party identification
	gen labour=0
	replace labour=1 if partyid2==2

	gen conservative=0
	replace conservative=1 if partyid2==1
	
	rename partyid2 party	

	
*** control variables

* religion
	rename religion h1
	gen religion=.
	*none
	replace religion=0 if h1==1  
	*christian
	replace religion=1 if h1>=2 & h1<=8  
	replace religion=1 if h1>=21 & h1<=27
	*other
	replace religion=2 if h1>=9 & h1<=14
	
* religion (more categories)
	gen religion_cat=.
	*none
	replace religion_cat=0 if h1==1 
	*christian, non-catholic
	replace religion_cat=1 if h1==2
	replace religion_cat=1 if h1>=4 & h1<=8  
	replace religion_cat=1 if h1>=21 & h1<=27
	*catholic
	replace religion_cat=2 if h1==3 
	*other
	replace religion_cat=3 if h1>=9 & h1<=14
	drop h1 
	
* age
	rename rage age
	replace age=. if age >= 98
	
* union
	gen union = .
	replace union=1 if unionsa==1
	replace union=1 if unionsa==2
	replace union=0 if unionsa==3
	
* ethnicity
	gen white=0
	replace white=1 if raceori2==9 | raceori2==10 
	
* home
	gen ownhome=0
	replace ownhome=1 if tenure2==1

* education
	gen education=tea
	replace education=6 if tea==6 | tea==7
	replace education=. if tea>=8

	tempfile 02_clean
	save `02_clean'


	
	
	
// ---------- 03 file: ----------
	use "C:\your\path\to\bsa03.dta", clear 
	
	gen year=2003
	rename intdate dateint
	
	gen h2 = real(substr(dateint, -6, 2)) 
	gen m=h2
	gen imonth = ym(year, m)
	format imonth %tm
	drop  h2 
	
	decode concode, gen(constituency)
	replace constituency = subinstr(constituency," ","",.)
	replace constituency = subinstr(constituency,"&","and",.)
	replace constituency = subinstr(constituency,".","",.)
	rename srsocc1e rsocclas
	
	keep rsoc90 dateint m imonth constituency whpaper ecpolicy year region stregion rage rsex partyid1 partyid2 rsocclas raceori2 hedqual readpap religion rnghgrp rnsoccl unionsa raceori2 tenure2 tea
	*
	rename rnghgrp rghgrp 
	replace rghgrp=0 if rghgrp==-1 
	replace rghgrp=9 if rghgrp==98 
	
	rename rnsoccl sclass 
	replace sclass=5 if sclass==6

	rename readpap h1 
	gen readpap=0
	replace readpap=1 if h1==1 
	replace readpap=. if h1==9 
	lab var readpap "r reads newpaper 3+times per week"
	drop h1 

	rename hedqual edu

	rename rsex gender

	rename whpaper news
	replace news=0 if news==-1
	replace news=13 if news==6
	
	gen voteleave=0
	replace voteleave=1 if ecpolicy==1 
	replace voteleave=. if ecpolicy==.
	replace voteleave=. if ecpolicy==9
	replace voteleave=. if ecpolicy==-2
	
	gen voteleave2=0
	replace voteleave2=1 if ecpolicy==1 
	replace voteleave2=1 if ecpolicy==2
	replace voteleave2=. if ecpolicy==.
	replace voteleave2=. if ecpolicy==9
	replace voteleave2=. if ecpolicy==-2
	
	gen leave_dk=0 if ecpolicy<=7
	replace leave_dk=1 if ecpolicy==8
	replace leave_dk=. if ecpolicy==. | ecpolicy==-2 
	
	gen h1=dateint
	tostring h1, replace format(%20.0f)
	replace h1 = "0" + h1 if length(h1) == 7
	gen date = date(h1,"DMY")
	format date %td
	gen month=month(date)
	gen day=day(date)
	drop h1 
	gen const=.

	gen occupation=rsoc90
	
	
	
***** MISV variables *****

*** party support

* party identification
	gen labour=0
	replace labour=1 if partyid2==2

	gen conservative=0
	replace conservative=1 if partyid2==1
	
	rename partyid2 party	

	
*** control variables

* religion
	rename religion h1
	gen religion=.
	*none
	replace religion=0 if h1==1  
	*christian
	replace religion=1 if h1>=2 & h1<=8  
	replace religion=1 if h1>=21 & h1<=27
	*other
	replace religion=2 if h1>=9 & h1<=14
	
* religion (more categories)
	gen religion_cat=.
	*none
	replace religion_cat=0 if h1==1 
	*christian, non-catholic
	replace religion_cat=1 if h1==2
	replace religion_cat=1 if h1>=4 & h1<=8  
	replace religion_cat=1 if h1>=21 & h1<=27
	*catholic
	replace religion_cat=2 if h1==3 
	*other
	replace religion_cat=3 if h1>=9 & h1<=14
	drop h1 
	
* age
	rename rage age
	replace age=. if age >= 98
	
* union
	gen union = .
	replace union=1 if unionsa==1
	replace union=1 if unionsa==2
	replace union=0 if unionsa==3
	
* ethnicity
	gen white=0
	replace white=1 if raceori2==9 | raceori2==10 
	
* home
	gen ownhome=0
	replace ownhome=1 if tenure2==1

* education
	gen education=tea
	replace education=6 if tea==6 | tea==7
	replace education=. if tea>=8
	
	tempfile 03_clean
	save `03_clean'


	
	
	
// ---------- 04 file: ----------
	use "C:\your\path\to\bsa04.dta", clear 

	gen year=2004
	rename intdate dateint
	rename rclass rsocclas
	
	gen h2 = real(substr(dateint, -6, 2)) 
	gen m=h2
	gen imonth = ym(year, m)
	format imonth %tm
	drop  h2 
	
	keep rsoc90 dateint m imonth conname1 whpaper ecpolicy year region stregion rage rsex partyid1 partyid2 rsocclas raceori2 hedqual readpap religion rnghgrp rnsoccl unionsa tenure2 tea
	*
	rename conname1 constituency
	replace constituency = subinstr(constituency," ","",.)
	replace constituency = lower(constituency)
	replace constituency = subinstr(constituency, "boroconst","", .)
	replace constituency = subinstr(constituency, "borocons","", .)
	replace constituency = subinstr(constituency, "boroco","", .)
	replace constituency = subinstr(constituency, "boroc","", .) 
	replace constituency = subinstr(constituency, "boro","", .) 
	replace constituency = subinstr(constituency, "coconst","", .)
	replace constituency = subinstr(constituency, "cocon","", .)
	replace constituency = subinstr(constituency, "const","", .)
	replace constituency = subinstr(constituency,"&","and",.)
	replace constituency = subinstr(constituency,".","",.)
	
	rename rnghgrp rghgrp 
	replace rghgrp=0 if rghgrp==-1 
	replace rghgrp=9 if rghgrp==98
	
	rename rnsoccl sclass 
	replace sclass=5 if sclass==6

	rename readpap h1 
	gen readpap=0
	replace readpap=1 if h1==1 
	replace readpap=. if h1==9 
	lab var readpap "r reads newpaper 3+times per week"
	drop h1 

	rename hedqual edu

	rename rsex gender

	rename whpaper news
	replace news=0 if news==-1
	replace news=13 if news==6
	
	gen voteleave=0
	replace voteleave=1 if ecpolicy==1 
	replace voteleave=. if ecpolicy==.
	replace voteleave=. if ecpolicy==9
	replace voteleave=. if ecpolicy==-2
	
	gen voteleave2=0
	replace voteleave2=1 if ecpolicy==1 
	replace voteleave2=1 if ecpolicy==2
	replace voteleave2=. if ecpolicy==.
	replace voteleave2=. if ecpolicy==9
	replace voteleave2=. if ecpolicy==-2
	
	gen leave_dk=0 if ecpolicy<=7
	replace leave_dk=1 if ecpolicy==8
	replace leave_dk=. if ecpolicy==.
	
	gen h1=dateint
	tostring h1, replace format(%20.0f)
	replace h1 = "0" + h1 if length(h1) == 7
	gen date = date(h1,"DMY")
	format date %td
	gen month=month(date)
	gen day=day(date)
	drop h1 

	gen const=constituency
	
	gen occupation=rsoc90

	
	
***** MISV variables *****

*** party support

* party identification
	gen labour=0
	replace labour=1 if partyid2==2

	gen conservative=0
	replace conservative=1 if partyid2==1
	
	rename partyid2 party	

	
*** control variables

* religion
	rename religion h1
	gen religion=.
	*none
	replace religion=0 if h1==1  
	*christian
	replace religion=1 if h1>=2 & h1<=8  
	replace religion=1 if h1>=21 & h1<=27
	*other
	replace religion=2 if h1>=9 & h1<=14
	
* religion (more categories)
	gen religion_cat=.
	*none
	replace religion_cat=0 if h1==1 
	*christian, non-catholic
	replace religion_cat=1 if h1==2
	replace religion_cat=1 if h1>=4 & h1<=8  
	replace religion_cat=1 if h1>=21 & h1<=27
	*catholic
	replace religion_cat=2 if h1==3 
	*other
	replace religion_cat=3 if h1>=9 & h1<=14
	drop h1 
	
* age
	rename rage age
	replace age=. if age >= 98
	
* union
	gen union = .
	replace union=1 if unionsa==1
	replace union=1 if unionsa==2
	replace union=0 if unionsa==3
	
* ethnicity
	gen white=0
	replace white=1 if raceori2==9 | raceori2==10 
	
* home
	gen ownhome=0
	replace ownhome=1 if tenure2==1

* education
	gen education=tea
	replace education=6 if tea==6 | tea==7
	replace education=. if tea>=8
	
	tempfile 04_clean
	save `04_clean'

*
	//we combine the cleaned files into a single dataset by appending them:
	use  "`83_91_clean'"
	append using "`92_clean'" "`93_clean'" "`94_clean'" "`95_clean'" "`96_clean'" "`97_clean'" "`98_clean'" "`99_clean'" "`00_clean'" "`01_clean'" "`02_clean'" "`03_clean'" "`04_clean'", force
	
	label define whpaper 13 "daily record ", add

	save "C:\your\path\to\BSA_merge.dta", replace
	
	
	
	

	
	
	
	
********************************************************************************
* clean_BSA
********************************************************************************

	/*
--------------------------------------------------------------------------------
This code recodes and cleans the merged BSA data in various steps. 

--------------------------------------------------------------------------------
*/

*data will be merged with the bridge constituency 1997 coding file:
	import excel "C:\your\path\to\1997_constituencies_RA_checks.xlsx", sheet("check") firstrow clear

	duplicates tag constituency, gen(dup_id)

	fre dup_id 

	save "C:\your\path\to\1997_constituencies_ready.dta", replace 

// We read in the merged BSA data 
	use "C:\your\path\to\BSA_merge.dta", clear 

	drop if region==1 | region==4

	*merge data with 1997 constituency matrix: 
	merge m:1 constituency using "C:\your\path\to\1997_constituencies_ready.dta"
	drop if _merge == 1 & ( year != 1997 & year != 1999)

	replace constituency_1997 = sector_constituency_1997 if year == 1997 
	replace constituency_1997 = sector_constituency_1999 if year == 1999 
	
	drop sector_constituency_199*
	*nothing after 1996 included

	sort constituency_1997 year
	bysort constituency_1997: carryforward county, replace 
	gsort constituency_1997 -year
	bysort constituency_1997: carryforward county, replace 

	*delete missing infos: 
	drop if constituency_1997==""

	drop if county=="Wales"
	encode constituency_1997, gen(constituency97_id)
	rename constituency_1997 constituency97
	encode county, gen(county_id)
	drop _merge

	
	*liverpool:
	*drop Liverpool
	gen liverpool=0
	replace liverpool=1 if county=="Merseyside"
	replace liverpool=. if county==""
	
	*Boycott began 20.04.1989, so treatment is from then on
	drop if dateint==990489 | dateint==999989
	gen before=1 if year>=1989
	replace before=0 if dateint==990389
	replace before=0 if dateint==10389
	replace before=0 if dateint==10489
	replace before=0 if dateint==20389
	replace before=0 if dateint==30389
	replace before=0 if dateint==40389
	replace before=0 if dateint==40489
	replace before=0 if dateint==50389
	replace before=0 if dateint==50489
	replace before=0 if dateint==60389
	replace before=0 if dateint==60489
	replace before=0 if dateint==70489
	replace before=0 if dateint==80489
	replace before=0 if dateint==100489
	replace before=0 if dateint==110489
	replace before=0 if dateint==120389
	replace before=0 if dateint==120489
	replace before=0 if dateint==130489
	replace before=0 if dateint==140389
	replace before=0 if dateint==140489
	replace before=0 if dateint==150389
	replace before=0 if dateint==150489
	replace before=0 if dateint==160389
	replace before=0 if dateint==160489
	replace before=0 if dateint==170389
	replace before=0 if dateint==170489
	replace before=0 if dateint==180389
	replace before=0 if dateint==180489
	replace before=0 if dateint==190389
	replace before=0 if dateint==200389
	replace before=0 if dateint==210389
	replace before=0 if dateint==220389
	replace before=0 if dateint==230389
	replace before=0 if dateint==240389
	replace before=0 if dateint==260389
	replace before=0 if dateint==270389
	replace before=0 if dateint==280389
	replace before=0 if dateint==290389
	replace before=0 if dateint==300389
	replace before=0 if dateint==310389
	replace before=0 if year<=1988
	
	rename before treat 
	label define hill 0 "Before Hillsborough" 1 "After Hillsborough"
	label values treat hill
	lab var treat Hillsborough
	
	*treatment for the Sun endorsing Labour (Appendix)
	gen treat_endorse_Labour = 1 if (year > 1997) | (year == 1997 & m > 3) 
	replace treat_endorse_Labour = 0 if (year < 1997) | (year == 1997 & m <= 3)

	label define endorse_labour 0 "Before Endorsement" 1 "After Endorsement"
	label values treat_endorse_Labour endorse_labour
	label var treat_endorse_Labour "Endorsement of Labour"
	
	*clean class:
	replace sclass=0 if sclass==-1
	replace sclass=5 if sclass==7
	replace sclass=4 if sclass==8
	replace sclass=. if sclass==9 | sclass==-2 | sclass==-3
	label define rsocclas 0 "skpd,never had job" 1 "professional" 2 "intermediate" 3 "skilled" 4 "partly  skilled" 5 "unskilled" 7 "skp,look aft home" 8 "armed   forces" 9 "insuffic info", replace
	label variable sclass "social class"
	
	*drop newspaper
	gen newspaper=0 if news!=. 
	replace newspaper=1 if news==5
	
	*create newspaper dummies: 
	gen sun=0 if news!=. 
	replace sun=1 if news==5
	replace sun=. if news==96 | news==99 
	
	gen mail=0 if news!=. 
	replace mail=1 if news==2 
	replace mail=. if news==96 | news==99 
	
	gen express=0 if news!=. 
	replace express=1 if news==1
	replace express=. if news==96 | news==99 
	
	gen telegraph=0 if news!=. 
	replace telegraph=1 if news==7
	replace telegraph=. if news==96 | news==99 
	
	gen mirror=0 if news!=.
	replace mirror=1 if news==3 
	replace mirror=. if news==96 | news==99 
	
	gen nonreader=0 if news!=.
	replace nonreader=1 if news==0  
	replace nonreader=. if news==96 | news==99 
	
	*clean region: 
	drop region 
	gen region=""
	replace region="East Midlands" if county=="Derbyshire" | county=="Leicestershire" | county=="Northamptonshire" | county=="Nottinghamshire"
	replace region="East of England" if county=="Bedfordshire" | county=="Cambridgeshire" | county=="Essex" | county=="Hertfordshire" | county=="Norfolk" | county=="Suffolk"
	replace region="North East England" if county=="Cleveland" | county=="Durham" | county=="Northumberland" | county=="Tyne and Wear"
	replace region="North West England" if county=="Cheshire" | county=="Cumbria" | county=="Greater Manchester" | county=="Lancashire" | county=="Merseyside" 
	replace region="South East England" if county=="Berkshire" | county=="Buckinghamshire" | county=="East Sussex"  | county=="Hampshire" | county=="Isle of Wight" | county=="Isle of Wight " | county==" Isle of Wight" | county=="Kent" | county=="Oxfordshire" | county=="Surrey" | county=="West Sussex"
	replace region="South West England" if county=="Avon" | county=="Cornwall" | county=="Devon" | county=="Dorset" | county=="Gloucestershire" | county=="Somerset" | county=="Wiltshire" 	 
	replace region="West Midlands" if county=="Hereford and Worcester" | county=="Shropshire" | county=="Staffordshire" | county=="Warwickshire" | county=="West Midlands"
	replace region="Yorkshire and the Humber" if county=="Humberside" | county=="Lincolnshire" | county=="North Yorkshire" | county=="South Yorkshire" | county=="West Yorkshire"
	replace region="Greater London" if county=="Greater London" 
	
	*create nuts2 identifier
	gen nuts2 = "Dorset and Somerset" if county == "Avon"
	replace nuts2 = "Bedfordshire and Hertfordshire" if county == "Bedfordshire"
	replace nuts2 = "Berkshire, Buckinghamshire, and Oxfordshire" if county == "Berkshire" 
	replace nuts2 = "Berkshire, Buckinghamshire, and Oxfordshire" if county == "Buckinghamshire"
	replace nuts2 = "East Anglia" if county == "Cambridgeshire"
	replace nuts2 = "Cheshire" if county == "Cheshire"
	replace nuts2 = "Tees Valley and Durham" if county == "Cleveland"
	replace nuts2 = "Cornwall and Isles of Scilly" if county == "Cornwall"
	replace nuts2 = "Cumbria" if county == "Cumbria"
	replace nuts2 = "Derbyshire and Nottinghamshire" if county == "Derbyshire"
	replace nuts2 = "Devon" if county == "Devon"
	replace nuts2 = "Dorset and Somerset" if county == "Dorset" 
	replace nuts2 = "Tees Valley and Durham" if county == "Durham"
	replace nuts2 = "Surrey, East and West Sussex" if county == "East Sussex"
	replace nuts2 = "Essex" if county == "Essex"
	replace nuts2 = "Gloucestershire, Wiltshire and Bristol/Bath area" if county == "Gloucestershire" 
	replace nuts2 = "Greater London" if county == "Greater London"
	replace nuts2 = "Greater Manchester" if county == "Greater Manchester"
	replace nuts2 = "Hampshire and Isle of Wight" if county == "Hampshire" 
	replace nuts2 = "Herefordshire, Worcestershire and Warwickshire" if county == "Hereford and Worcester"
	replace nuts2 = "Bedfordshire and Hertfordshire" if county == "Hertfordshire" 
	replace nuts2 = "East Riding and North Lincolnshire" if county == "Humberside" 
	replace nuts2 = "Hampshire and Isle of Wight" if county == "Isle of Wight"
	replace nuts2 = "Kent" if county == "Kent" 
	replace nuts2 = "Lancashire" if county == "Lancashire"
	replace nuts2 = "Leicestershire, Rutland and Northamptonshire" if county ==         "Leicestershire"
	replace nuts2 = "Lincolnshire" if county == "Lincolnshire"
	replace nuts2 = "Merseyside" if county == "Merseyside" 
	replace nuts2 = "East Anglia" if county == "Norfolk"
	replace nuts2 = "North Yorkshire" if county == "North Yorkshire"
	replace nuts2 = "Leicestershire, Rutland and Northamptonshire" if county == "Northamptonshire" 
	replace nuts2 = "Northumberland and Tyne and Wear" if county == "Northumberland"
	replace nuts2 = "Derbyshire and Nottinghamshire" if county == "Nottinghamshire"
	replace nuts2 = "Berkshire, Buckinghamshire, and Oxfordshire" if county == "Oxfordshire"
	replace nuts2 = "Shropshire and Staffordshire" if county == "Shropshire"
	replace nuts2 = "Dorset and Somerset" if county == "Somerset"
	replace nuts2 = "South Yorkshire" if county == "South Yorkshire"         
	replace nuts2 = "Shropshire and Staffordshire" if county == "Staffordshire"
	replace nuts2 = "East Anglia" if county == "Suffolk"
	replace nuts2 = "Surrey, East and West Sussex" if county == "Surrey"
	replace nuts2 = "Northumberland and Tyne and Wear" if county == "Tyne and Wear"
	replace nuts2 = "Herefordshire, Worcestershire and Warwickshire" if county == "Warwickshire" 
	replace nuts2 = "West Midlands" if county == "West Midlands"
	replace nuts2 = "Surrey, East and West Sussex" if county == "West Sussex"
	replace nuts2 = "West Yorkshire" if county == "West Yorkshire"
	replace nuts2 = "Gloucestershire, Wiltshire and Bristol/Bath area" if county == "Wiltshire"

	encode region, gen(region_id)
	
	*create a North dummy: 
	gen North=0
	replace North=1 if region_id==4 | region_id==5 | region_id==9
	
	*drop regional dummy from BSA which has errors: 
	drop stregion 
	
	*create variable measuring how much immigration from EU wanted (almost never asked):
	gen lessimm=0 if EUimmigration==2 
	replace lessimm=1 if EUimmigration==4 | EUimmigration==8 | EUimmigration==10
	replace lessimm=2 if EUimmigration==3
	
	*create an identifier splitting 1989 into pre and post Hillsborough: 	
	gen year_plot=year 
	replace year_plot=1989.36 if year==1989 & treat==0
	replace year_plot=1989.6 if year==1989 & treat==1
	
	gen spillover=0
	replace spillover=1 if county=="Cheshire" | county=="Lancashire" | county=="Manchester"
	replace spillover=. if county=="Merseyside"
	
	gen prof=0
	replace prof=1 if rghclass==1 | rghclass==2 | rghclass==3
	replace prof=. if rghclass==.
	
	gen skilled=0
	replace skilled=1 if rghclass==9
	replace skilled=. if rghclass==.
	
	gen unskilled=0
	replace unskilled=1 if rghclass==10
	replace unskilled=. if rghclass==.
	
	gen neverwork=0
	replace neverwork=1 if sclass==0
	replace neverwork=. if sclass==.
	
	gen matchtreat=liverpool*treat
		
	gen lmh=0 if sclass==0 | sclass==5 | sclass==4
	replace lmh=1 if sclass==3 
	replace lmh=2 if sclass==1 | sclass==2 
	replace lmh=. if sclass==.
	
	gen female=0
	replace female=1 if gender==2  

//We pool 2 survey years for plotting: 	
	gen year_two=1984 if year==1983 | year==1984 
	replace year_two=1986 if year==1985 | year==1986 
	replace year_two=1989 if year==1987 | (year==1989 & treat==0)
	replace year_two=1990 if (year==1989 & treat==1) | year==1990 
	replace year_two=1992 if year==1991 | year==1992  
	replace year_two=1994 if year==1993 | year==1994  
	replace year_two=1996 if year==1995 | year==1996  
	replace year_two=1998 if year==1997 | year==1998 
	replace year_two=2000 if year==1999 | year==2000  
	replace year_two=2002 if year==2001 | year==2002
	replace year_two=2004 if year==2003 | year==2004

	gen did=liverpool*treat

	gen substitution=1 if news==5
	replace substitution=2 if news==0
	replace substitution=3 if news==2 | news==1 | news==4 | news==7
	replace substitution=4 if news==8 | news==9 | news==10 | news==11 | news==3
	replace substitution=5 if news==94
	replace substitution=6 if news==6 | news==95 | news==96
	label define substi 1 "Sun" 2 "no reader" 3 "anti EU" 4 "pro EU" 5 "local papers" 6 "other"
	label values substitution substi

	replace edu = . if edu == 8
	revrs edu, replace
	
	****mean imputation of missings: 
	foreach var of varlist age {
		egen mean_`var' = mean(`var')
		gen imp_`var'=`var'
		replace imp_`var'=mean_`var' if `var'==.
	}

	foreach var of varlist edu religion sclass {
		egen median_`var' = median(`var')
		gen imp_`var'=`var'
		replace imp_`var'=median_`var' if `var'==.
	}
	replace imp_edu=. if year<=1984

	*imputation a bit more complicated for month: 
	*egen mean_imonth = mean(imonth), by(year)
	*egen max_mean_imonth = max(mean_imonth), by(year)
	*gen imp_imonth=imonth 
	*imp_imonth=max_mean_imonth
	
//Merge with nuts2 infos: 
	merge m:1 nuts2 year using "C:\your\path\to\FB_nuts2.dta"
	drop _merge 

	*quarters:
	drop month 
	gen iquarter = qofd(dofm(ym(year, m)))
	format %tq iquarter

//Create formative years infos: 
	gen birthyear=year-age
	
	gen formative=0
	replace formative=1 if birthyear>=1973
	replace formative=. if birthyear==.
	lab var formative "born after 1972"

//Create adjacent identifier:
	gen adjacent=0
	replace adjacent=1 if county=="Cheshire" | county=="Lancashire" | county=="Greater Manchester"
	
	
	
***** recode lmh backwards
	gen lmh_reverse = .
	replace lmh_reverse=2 if lmh==0
	replace lmh_reverse=1 if lmh==1
	replace lmh_reverse=0 if lmh==2

	
	
***** recode lmh dummy
	gen lmh_dum = .
	replace lmh_dum=1 if lmh==0
	replace lmh_dum=0 if lmh==1
	replace lmh_dum=0 if lmh==2

	save "C:\your\path\to\FB_BSA.dta", replace

//Create a timeseries to be merged with EB data: 
	use "C:\your\path\to\FB_BSA.dta"
	
	collapse (mean) voteleave, by(year)

	save "C:\your\path\to\FB_timeseries.dta", replace

//Count how often a county is observed in the BSA data: 
	use "C:\your\path\to\FB_BSA.dta"

	collapse (mean) voteleave, by(county_id year)

	bysort county_id: gen counter = _N

	bysort county_id: gen n = _n 

	keep if n == 1

	drop n

	tempfile counter
	save `counter'

	use "C:\your\path\to\FB_BSA.dta"

	merge m:1 county_id using "`counter'"
	drop _merge 

//Some labeling: 
	lab var did "$\delta$ DiD"
	label define lmh 0 "unskilled working class" 1 "skilled working class" 2 "middle class"
	label values lmh lmh
	lab var sclass "social class (0-5)"
	lab var imp_sclass "social class (0-5)"
	lab var gender "female (0,1)"
	lab var edu "education (1-7)"
	lab var imp_edu "education (1-7)"
	lab var age "age (18-98)"
	lab var imp_age "age (18-98)"
	lab var  religion "religious (0,1)"
	lab var  imp_religion "religious (0,1)"
	lab var  leave_dk "don't knows on EEC/EU question"
	lab var  voteleave "support leaving the EU (0,1)"
	lab var  liverpool "Merseyside"
	lab var  treat "Hillsborough"
	lab var  imonth "month"
	lab var  year "year"
	lab var  year_two "two years pooled"
	lab var  age "age"
	lab var  female "female"
	lab var labour "PiD: Labour"
	lab var conservative "PiD: Conservative"
	*lab var libdem "PiD: LibDem"
	lab var year "year"
	lab var constituency97_id "constituency 97"
	lab var constituency97 "constituency 97"
	lab var region_id "region"
	lab var region "region"
	lab var North "North"
	lab var iquarter "quarter"
	lab var birthyear "year of birth"
	lab var lmh "class"
	lab var substitution "newspapers: EU (1-6)"
	lab var sun "Sun reader (0,1)"
	lab var adjacent "adjacent county (0,1)"

	*keep only variables that are relevant for our analysis: 

	order constituency97_id constituency97 county_id county region_id region North ///
	nuts2 adjacent year dateint idate iquarter imonth m d year_two conhate labhate redist_lr unions_lr unions_lr_sc gay_lr asianimmig_lr conextr_p labextr_p labour conservative religion religion_cat age union white ownhome education imp_age birthyear formative female edu imp_edu  imp_religion sclass imp_sclass lmh lmh_reverse lmh_dum voteleave* leave_dk substitution sun pop gdp employment agriculture construction industry liverpool treat treat_endorse_Labour did matchtreat

	save "C:\your\path\to\FB_BSA.dta", replace

//Analysis for Media Influence and Spatial Voting: The Role of Perceived Party Positions is conducted in the RScripts "analysis.R" and "appendix.R"


